from textblob import TextBlob
import nltk
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud
import PIL.Image as image
import pandas as pd
import numpy as np
import os
import string
import matplotlib.pyplot as plt
top15 = pd.read_csv('top15.csv')
t1 = pd.read_csv('01.csv')
t2 = pd.read_csv('02.csv')
t3 = pd.read_csv('03.csv')
t4 = pd.read_csv('04.csv')
t5 = pd.read_csv('05.csv')
t6 = pd.read_csv('06.csv')
t7 = pd.read_csv('07.csv')
t8 = pd.read_csv('08.csv')
t9 = pd.read_csv('09.csv')
t10 = pd.read_csv('10.csv')
t11 = pd.read_csv('11.csv')
t12 = pd.read_csv('12.csv')
t13 = pd.read_csv('13.csv')
t14 = pd.read_csv('14.csv')
t15 = pd.read_csv('15.csv')
comment_arr = top15.to_numpy()
l=len(top15)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in top15["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of all tourists', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('glasgow.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.figtext(.5,.9,'Top200 words in comment title of tourists', fontsize=20, ha='center')
plt.axis('off')
plt.show()
comment_arr = t1.to_numpy()
l=len(t1)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t1["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Kelvingrove Museum', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('01.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.figtext(.5,.9,'Top200 words in comment of Kelvingrove Museum', fontsize=20, ha='center')
plt.axis('off')
plt.show()
comment_arr = t1.to_numpy()
l=len(t1)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t1["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Kelvingrove Museum', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('01.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t2.to_numpy()
l=len(t2)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t2["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of The Riverside Museum', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('02.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t2.to_numpy()
l=len(t2)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t2["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of The Riverside Museum', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('02.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t3.to_numpy()
l=len(t3)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t3["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Glengoyne Distillery', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('03.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t3.to_numpy()
l=len(t3)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t3["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Glengoyne Distillery', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('03.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t4.to_numpy()
l=len(t4)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t4["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Celtic Park', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('04.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t4.to_numpy()
l=len(t4)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t4["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Celtic Park', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('04.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t5.to_numpy()
l=len(t5)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t5["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of University of Glasgow', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('05.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t5.to_numpy()
l=len(t5)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t5["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of University of Glasgow', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('05.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t6.to_numpy()
l=len(t6)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t6["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of The Necropolis', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('06.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t6.to_numpy()
l=len(t6)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t6["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of The Necropolis', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('06.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t7.to_numpy()
l=len(t7)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t7["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Tennents Wellpark Brewery', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('07.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t7.to_numpy()
l=len(t7)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t7["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Tennents Wellpark Brewery', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('07.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t8.to_numpy()
l=len(t8)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t8["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of The Clydeside Distillery', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('08.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t8.to_numpy()
l=len(t8)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t8["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of The Clydeside Distillery', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('08.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t9.to_numpy()
l=len(t9)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t9["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Ibrox Stadium', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('09.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t9.to_numpy()
l=len(t9)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t9["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Ibrox Stadium', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('09.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t10.to_numpy()
l=len(t10)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t10["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Glasgow Botanic Gardens', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('10.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t10.to_numpy()
l=len(t10)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t10["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Glasgow Botanic Gardens', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('10.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t11.to_numpy()
l=len(t11)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t11["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Glasgow Science Centre', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('11.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t11.to_numpy()
l=len(t11)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t11["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Glasgow Science Centre', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('11.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t12.to_numpy()
l=len(t12)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t12["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Glasgow Cathedral', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('12.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t12.to_numpy()
l=len(t12)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t12["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Glasgow Cathedral', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('12.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t13.to_numpy()
l=len(t13)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t13["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Buchanan Street', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('13.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t13.to_numpy()
l=len(t13)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t13["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Buchanan Street', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('13.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t14.to_numpy()
l=len(t14)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t14["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of Pollok Country Park', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('14.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t14.to_numpy()
l=len(t14)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t14["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of Pollok Country Park', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('14.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t15.to_numpy()
l=len(t15)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t15["content"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment of The Tenement House', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('15.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
comment_arr = t15.to_numpy()
l=len(t15)
comment_arr=[]
subjectivity_arr=[]
text_corpus=""
for t in t15["main"]:
text_corpus=text_corpus+"."+t
text_nopunct=''
text_nopunct= "".join([char for char in text_corpus if char not in string.punctuation])
tokenizer = nltk.tokenize.RegexpTokenizer('\w+')
text_tokens = tokenizer.tokenize(text_nopunct)
text_words = []
for word in text_tokens:
text_words.append(word.lower())
stopwords = nltk.corpus.stopwords.words('english')
final_words=[]
for word in text_words:
if word not in stopwords:
final_words.append(word)
wn = WordNetLemmatizer()
lem_final_words=[]
for word in final_words:
word=wn.lemmatize(word)
lem_final_words.append(word)
freq_dist_text = nltk.FreqDist(lem_final_words)
plt.figure(figsize=(10,6))
plt.figtext(.5,.9,'Top30 words in comment title of The Tenement House', fontsize=20, ha='center')
freq_dist_text.plot(30)
<AxesSubplot:xlabel='Samples', ylabel='Counts'>
res_text=' '.join([i for i in lem_final_words if not i.isdigit()])
plt.figure(figsize=(20,16))
mask = np.array(image.open('15.jpg'))
wordcloud = WordCloud( mask = mask,
background_color='white',
max_words=300,
max_font_size=400,
width=2000,
height=1666
).generate(res_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()